Notes to this new file: (March 13, 2024)

  • This file builds on the file “Xu_2023_v4.0_AllfirmsPorts.Rmd” and the replication file “Hongyi R conversion (reconciliation) 2.html” (from Dean).

  • The purpose of this replication/updated file is to find the reconcile results from the above two files.

  • Currently, the treatment of negative earnings is removing all negative earnings.

  • Now the whole universe is the CRSP: NYSE + AMEX + NASDAQ.

  • Good material on generating tabsets for plots and results in RMarkdown.

  • For calculating the \(\bar{ge}\), I now update the formula.

  • Variable freq now can be used to get results with different frequencies. Meanwhile, the results in the reply file to the AE and the table in the main paper should be updated.

Data cleaning and Preparation

timestamp() 
## ##------ Thu Mar 28 09:00:11 2024 ------##
# 0. record datasets ----
## 0.1 initial value setup ----
freq = 4 # the frequency of the data <- 12 for monthly; 4 for quarterly; 1 for annually
start.ym = as.yearmon(1966) -1/12 # the starting time
end.ym = as.yearmon(2020) # the ending time 
negativevalue = 'remove' # what should we do with the negative values? 
if (negativevalue == 'remove') cat("Negative earnings are deleted.") 
## Negative earnings are deleted.
month_select <- function(freq = freq) { # return the months for differnt time frequency
  if (freq == 12) {
    return(c("January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November","December"))
  }
  if (freq == 4) {
    return(c("March", "June", "September", "December"))
    # return(months(seq(from = as.yearmon(1966) + 1/12, by = 1/4, length.out = 4)))
  }
  if (freq == 1) {
    return("December")
    # return(months(as.yearmon(1966) + 6/12)) # > July observations 
  }
}
freq_name <- function(freq = freq) {
  if (freq == 12) {
    return("monthly")
  }
  if (freq == 4) {
    return("quarterly") 
  }
  if (freq == 1) {
    return("annual")
  }
}

## 0.2 preliminary data cleaning ---- 
cat(paste("Selected month:", paste(month_select(freq = freq), collapse = ", ")), sep = "")
## Selected month: March, June, September, December
# "Variables_DK_CRSP_monthly_196601_201912.csv"
# "PredictorDataDK_CRSPm.csv"
# "PredictorDataDK_monthly_all.csv" - DK replication 
port_market <- read.csv("/Users/hongyixu/Library/CloudStorage/OneDrive-HandelshögskolaniStockholm/Xu_Katselas_Drienko._2021/R&R_JEF_PredictorDataDk_Dec3_2023/Data_Mar20_2024/PredictorDataDK_CRSPm.csv") %>% 
  as_tibble() %>% 
  mutate(month = as.yearmon(as.character(yyyymm), format = "%Y%m"), 
         vwret = rollsumr(log(1+vwret_DK), 12/freq, NA) ) %>% # converted into the log returns. 
  filter(if (negativevalue == 'remove') E12 >= 0 else TRUE) %>% 
  mutate(E12 = ((E12*(E12 >= 0) + 0.001*(E12 < 0))) * 12/freq ) %>% 
  filter(months(month) %in% month_select(freq = freq)) %>% 
  filter(month < end.ym) 

plot(y = port_market$E12, x = port_market$month, type = "b"); abline(h = 0, col = 'red', lty = 2)

    #elif negative == 'truncate':
    #    df['E12'] = np.where(df['E12'] < 0, 0.001, df['E12'])

write.csv(x = port_market, file = "market_Allfirms.csv")
# port_market_raw <- port_market %>% select(month, vwret, R, date, GM, GE, DP)

ports_all <- read.csv("/Users/hongyixu/Library/CloudStorage/OneDrive-HandelshögskolaniStockholm/Xu_Katselas_Drienko._2021/R&R_JEF_PredictorDataDk_Dec3_2023/PredictorDataDK_port_CRSPm.csv") %>% 
  # PredictorDataDK_port_CRSPm.csv
  # PredictorDataDK_szbm_CRSPm.csv
  as_tibble() 

ports <- unique(ports_all$port) # identifiers for portfolios
for (p in ports) {
  ports.dt <- ports_all %>%
    filter(port == p) %>% 
    select(-port) %>% 
    mutate(month = as.yearmon(as.Date(date, format = "%Y-%m-%d"))) %>% 
    arrange(month) %>% 
    mutate(vwret = rollsumr(log(1+vwret), 12/freq, NA) ) %>% 
    filter(if (negativevalue == 'remove') E12 >= 0 else TRUE) %>% 
    mutate(E12 = ((E12*(E12 >= 0) + 0.001*(E12 < 0))) * 12/freq ) %>% 
    filter(months(month) %in% month_select(freq = freq)) %>% 
    filter(month < end.ym) 
  
  write.csv(x = ports.dt, file = paste("port_", p, ".csv", sep = ""))
}

## 0.3 name portfolios and predictors ----
market.names <- list.files(pattern = "market_")[1]
data.names <- list.files(pattern = "^port_") # data for portfolios
  ## generate the name of portfolios 
  ## Define the two sets
  #set1 <- c("B", "S")
  #set2 <- c("H", "M", "L")
  # Use expand.grid() to generate all combinations
  #combinations <- expand.grid(set2, set1)[, c(2, 1)]
  ## Convert the result to a vector
  # ports <- do.call(paste0, combinations) ## ----updated March 7, 2024 
id.names <- c("Market", ports) # set plot names
ratio_names <- c("DP", "PE", "EY", "DY", "Payout") # potential predictors

## 0.4 risk-free rate ----- 
RF <- read.csv("/Users/hongyixu/Library/CloudStorage/OneDrive-HandelshögskolaniStockholm/Xu_Katselas_Drienko._2021/R&R_JEF_PredictorDataDk_Dec3_2023/Rfree_t30.csv") %>% # record the risk free rate
  as.tbl() %>% # as the average of the bid and ask.
  select(-X) %>%
  mutate(month = as.yearmon(month)) %>%
  filter(months(month) %in% month_select(freq = freq)) %>%
  filter(month < end.ym)

Figure 1 - Log Cumulative Index

Log cumulative realised portfolio return components for seven portfolios - the market portfolio and six size and book-to-market equity ratio sorted portfolios. All following figures demonstrate the quarterly realized price-earnings ratio growth (gm), earnings growth (ge), dividend-price (dp) and the portfolio return index (r) with the values in January 1966 as zero for all portfolios.

Log Cumulative Index

# TABLE-1. summary statistics ----
TABLE1.uni <- list() # the univariate statistics
TABLE1.cor <- list() # the correlation matrixs

month.dt <- seq(filter(select(port_market, month), month >= start.ym)$month[2],
                tail(port_market$month, 1), 1/freq)
# head(month.dt)
DP.df <- EY.df <- PE.df <- data.frame(month = month.dt)

## (1*) summary tables for Summary stats & Correlations ----
for (c in 1:7) {
  id <- c(market.names, data.names)[c]
  # print(id); print(id.names[c])
  
  ## 1. read the data ---- 
  data_nyse <- read.csv(id) %>%
    as_tibble() %>%
    mutate(month = as.yearmon(month)) %>% 
    filter(month >= start.ym) %>% # start from "Dec 1965" 
    select(month, r = vwret, P = Index, E = E12, D = D12, pe_raw = PE) %>% 
    # , R, GM_raw = GM, GE_raw = GE, DP_raw = DP
    mutate(DP = D / P, # these are adjusted by the log transformation
           PE = P / E,
           EP = E / P,
           EY = E / lag(P), # earnings yield
           DY = D / lag(P), # dividend yield
           Payout = D / E) # payout ratios
  
  PE.df <- PE.df %>% 
    left_join(select(data_nyse, month, PE), by = 'month')
  EY.df <- EY.df %>% 
    left_join(select(data_nyse, month, EY), by = 'month')
  DP.df <- DP.df %>% 
    left_join(select(data_nyse, month, DP), by = 'month')
  
  ## 2. return decomposition ----
  data_decompose <- data_nyse %>% 
    mutate(r = r, # cts returns = log total returns 
           gm = log(PE) - lag(log(PE)), # multiple expansion rate
           ge = log(E) - lag(log(E)), # earnings growth rate
           dp = log(1 + DP/freq)) %>% # only 1/12 of the dividends -----updated March 7, 2024 
    na.omit()
  
  ## 3. summary-Stat ----
  ar1.coef <- function(x) {
    return(as.numeric(lm(x ~ lag(x))$coefficients[2]))
  } # return the function value of the coefficient for the AR(1) model
  
  comp_summary.dt <- data_decompose %>%
    select(gm, ge, dp, r) %>%
    # , R, GM_raw, GE_raw, DP_raw 
    describe() %>%
    mutate(mean = mean * 100,
           sd = sd * 100,
           median = median * 100,
           min = min * 100,
           max = max * 100) %>%
    select(Mean = mean, Median = median, SD = sd, Min = min, Max = max, Skew = skew, Kurt = kurtosis) %>%
    round(digits = 4)
  
  comp_summary.dt$"AR(1)" <- data_decompose %>%
    select(gm, ge, dp, r) %>%
    apply(2, ar1.coef) %>%
    round(digits = 4)
  
  ### Store the summary stat
  # print(paste("Data starts from ", first(data_decompose$month), " and ends in ", last(data_decompose$month), ".", sep = ""))
  TABLE1.uni[[id.names[c]]] <- comp_summary.dt
  
  ## 4. correlations ----
  comp_cor <- data_decompose %>% select(gm, ge, dp, r) %>% cor()
  TABLE1.cor[[id.names[c]]] <- comp_cor
  
  # Figure-1. cumulative realised return components ---- 
  cat('\n')
  cat('#### ', id.names[c], '   \n')
  cat('\n')
  
  # jpeg(filename = paste("Figure1_", id.names[c], ".jpeg", sep = ""), width = 550, height = 350)
  par(mar = c(2, 4, 2, 1)) 
  cum_components.ts <- data.frame(month = month.dt) %>% 
    left_join(data_decompose, by = 'month') %>%
    select(r, gm, ge, dp) %>% 
    apply(2, FUN = function(x) cumsum(ifelse(is.na(x), 0, x) ) + x*0 ) %>% 
    ts(start = month.dt[1], frequency = freq)
  plot.ts(cum_components.ts, plot.type = "single", lty = 1:4, main = id.names[c], cex.main = 1, 
          xlab = NULL, ylab = "Cumulative Return and Components Indices")
  legend("topleft",
         legend = c("Total return", "Price earnings growth",
                    "Earnings growth", "Dividend price"),
         lty = 1:4,
         cex = 1.0) # text size
  # dev.off()
  par(mar = c(5, 4, 4, 2) + 0.1) 
  cat('\n')

} 

Market

BH

BL

BM

SH

SL

SM

write.csv(TABLE1.uni, file = "table_1.uni.csv")
write.csv(TABLE1.cor, file = "table_1.cor.csv")

.

Table 1 - Summary statistics of returns components

The correlations between gm and ge might be a bit too high comparing to Ferreira and Santa-Clara (2011). Need to check the code again.

‘kable’ for Table Creation

## summary data for table1
rowname <- rep(colnames(TABLE1.cor$Market), length(names(TABLE1.uni)))
portname = rep(names(TABLE1.uni), each = length(colnames(TABLE1.cor$Market)))

table1 <- do.call(rbind.data.frame, TABLE1.uni) %>% 
  cbind.data.frame(do.call(rbind.data.frame, TABLE1.cor)) %>%
  round(digits = 2) %>%
  cbind.data.frame(rowname, portname)

## give the table 1 outputs
gt(data = table1, rowname_col = "rowname", groupname_col = "portname") %>%
  tab_header(title = "Table 1 - Summary statistics of returns components",
             subtitle = paste(freq_name(freq = freq), " data starts from ", first(data_decompose$month), " and ends in ", last(data_decompose$month), ".", sep = "")) %>%
  tab_spanner(label = "Panel A: univariate statistics",
              columns = vars(Mean, Median, SD, Min, Max, Skew, Kurt, "AR(1)")) %>%
  tab_spanner(label = "Panel B: Correlations",
              columns = vars(gm, ge, dp, r)) %>%
  tab_source_note(source_note = html("Note: Panel A in this table presents mean, median, standard deviation (SD), minimum, maximum, skewness (Skew), kurtosis (kurt) and first-order autocorrelation coefficient of the realised components of stock market returns and six size and book-to-market equity ratio sorted portfolios. These univariate statistics for each portfolios are presented separately. gm is the continuously compounded growth rate in the price-earnings ratio. ge is the continuously compounded growth rate in earnings. dp is the log of one plus the dividend-price ratio. r is the portfolio returns. Panel B in this table reports correlation matrices for all seven portfolios. The sample period starts from Feburary 1966 and ends in December 2019."))
Table 1 - Summary statistics of returns components
quarterly data starts from Mar 1966 and ends in Dec 2019.
Panel A: univariate statistics Panel B: Correlations
Mean Median SD Min Max Skew Kurt AR(1) gm ge dp r
Market
gm 0.09 0.19 18.03 -139.80 107.44 -1.17 23.91 0.10 1.00 -0.87 -0.03 0.43
ge 2.11 2.88 15.72 -81.38 146.15 2.24 38.77 0.21 -0.87 1.00 -0.03 0.06
dp 0.69 0.67 0.25 0.24 1.34 0.37 -0.41 0.96 -0.03 -0.03 1.00 -0.07
r 2.37 3.29 8.41 -28.39 22.16 -0.84 1.38 0.00 0.43 0.06 -0.07 1.00
BH
gm 0.23 1.34 23.30 -120.61 142.15 0.07 10.75 0.08 1.00 -0.92 0.01 0.28
ge 2.35 2.93 23.19 -160.98 138.02 -0.54 17.07 0.06 -0.92 1.00 -0.06 0.09
dp 1.01 1.00 0.42 0.33 2.06 0.30 -0.88 0.96 0.01 -0.06 1.00 -0.02
r 2.74 3.70 8.61 -31.22 29.58 -0.73 1.67 0.06 0.28 0.09 -0.02 1.00
BL
gm 0.11 0.27 20.79 -134.07 155.65 0.68 21.60 0.27 1.00 -0.89 -0.04 0.34
ge 2.05 2.61 19.96 -162.34 137.60 -1.26 30.70 0.32 -0.89 1.00 -0.02 0.11
dp 0.47 0.43 0.17 0.17 1.02 1.11 0.88 0.94 -0.04 -0.02 1.00 -0.11
r 2.18 2.87 9.16 -35.17 22.80 -0.81 1.54 0.02 0.34 0.11 -0.11 1.00
BM
gm 0.03 0.25 19.30 -131.39 147.27 0.59 24.80 0.19 1.00 -0.90 0.01 0.30
ge 2.07 3.10 18.93 -163.79 138.79 -1.43 39.44 0.26 -0.90 1.00 -0.07 0.12
dp 0.81 0.72 0.34 0.38 2.06 0.99 0.51 0.97 0.01 -0.07 1.00 -0.08
r 2.35 3.52 8.09 -27.52 21.35 -0.82 1.28 0.06 0.30 0.12 -0.08 1.00
SH
gm -0.58 -0.04 20.77 -119.74 142.09 0.64 14.81 0.21 1.00 -0.80 -0.08 0.45
ge 4.99 4.46 19.40 -163.79 138.98 -1.53 36.39 0.30 -0.80 1.00 -0.02 0.14
dp 0.81 0.82 0.32 0.17 2.01 0.41 0.38 0.80 -0.08 -0.02 1.00 -0.15
r 3.47 3.83 12.00 -39.21 40.26 -0.33 1.05 0.01 0.45 0.14 -0.15 1.00
SL
gm -0.42 -0.08 24.72 -130.40 154.99 0.33 10.35 0.08 1.00 -0.76 -0.10 0.61
ge 4.02 3.85 20.12 -163.39 138.06 -1.19 31.83 0.18 -0.76 1.00 0.02 0.02
dp 0.17 0.13 0.11 0.03 0.54 1.38 1.32 0.90 -0.10 0.02 1.00 -0.05
r 0.82 1.22 15.77 -50.03 41.07 -0.44 0.86 -0.02 0.61 0.02 -0.05 1.00
SM
gm -0.53 -0.94 20.86 -128.18 148.79 0.70 17.60 0.18 1.00 -0.81 -0.05 0.49
ge 4.33 4.13 18.81 -164.27 137.75 -1.71 40.95 0.27 -0.81 1.00 -0.03 0.10
dp 0.50 0.46 0.27 0.13 1.39 0.63 -0.49 0.91 -0.05 -0.03 1.00 -0.10
r 2.74 2.97 11.94 -36.56 37.43 -0.41 0.83 -0.01 0.49 0.10 -0.10 1.00
Note: Panel A in this table presents mean, median, standard deviation (SD), minimum, maximum, skewness (Skew), kurtosis (kurt) and first-order autocorrelation coefficient of the realised components of stock market returns and six size and book-to-market equity ratio sorted portfolios. These univariate statistics for each portfolios are presented separately. gm is the continuously compounded growth rate in the price-earnings ratio. ge is the continuously compounded growth rate in earnings. dp is the log of one plus the dividend-price ratio. r is the portfolio returns. Panel B in this table reports correlation matrices for all seven portfolios. The sample period starts from Feburary 1966 and ends in December 2019.

Figure 3 - Cumulative OOS R-sqaure Difference and Cumulative SSE Difference

Note:

  • “SOP_c” here is for the constrained SOP predictions.

Cumulative OOS R-sqaure Difference and Cumulative SSE Difference

# TABLE-2. different tables OOS R-square ----
TABLE2 <- list()
table2.df <- data.frame()

## (3*) repeat for each portfolio ----
c <- 0
for (id in c(market.names, data.names)) {
  c <- c + 1
  # print(id); print(id.names[c])
  cat('\n')
  cat('#### ', id.names[c], '   \n')
  
  ## 1. read the data ----
  data_nyse <- read.csv(id) %>% 
    as.tbl() %>%
    mutate(month = as.yearmon(month)) %>%
    filter(month >= start.ym) %>% # start from "Dec 1965"
    select(month, r = vwret, P = Index, E = E12, D = D12) %>%
    mutate(DP = D / P, # construct predictors
           PE = P / E,
           EP = E / P,
           EY = E / lag(P), # earnings yield
           DY = D / lag(P), # dividend yield
           Payout = D / E) # payout ratios
  
  ## 2. return decomposition ----
  k = freq * 20 # set a 20-year rolling window (total k periods.)
  
  data_decompose <- data_nyse %>% # also try PD ratio replacing PE.
    mutate(r = r, # cts returns (has already being the log return in row 95)
           gm = log(PE) - lag(log(PE)), # multiple expansion rate
           ge = log(E) - lag(log(E)), # earnings growth rate
           # mu_ge0 = (log(E) - lag(log(E), k)) / k,
           dp = log(1 + DP/freq)) %>% # see note 1.
    # only 1/12 of the annualised dividend is included. > see note 1.
    na.omit() %>% 
    left_join(select(data_nyse, month, Ek = E) %>% mutate(month = month + k/freq), by = 'month' ) %>% 
    mutate(mu_ge0 = (log(E) - log(Ek)) / k ) 
  
  ## 3. SOP predictions ---- 
  data_pred <- data_decompose %>%
    select(month, r, gm, ge, dp, mu_ge0) %>%
    mutate(mu_gm = 0,
           mu_ge1 = rollmeanr(ge, k, fill = NA_real_), # rolling mean 
           mu_ge2 = c(rep(NA_real_, (k-1)), cummean(ge)[k:length(ge)]), # recursive mean
           mu_ge3 = cummean(ge), # recursive mean from the beginning 
           a_DK1 = rollmeanr(r - dp, k, fill = NA_real_), # methods Eq (14/15) by DK 
           a_DK2 = cummean(r - dp), # methods Eq (14/15) by DK 
           mu_dp = dp,
           mu_sop = mu_gm + mu_ge0 + mu_dp) %>% # the predictor > see note 2.
    mutate(sop_simple = lag(mu_sop), # conditional predictions
           hist_mean = lag(cummean(r)) ) # historical mean predictions
  
  ### 3.2 constrained SOP predictions ----- 
    constrained_sop2 <- constrained_sop <- rep(NA_real_, nrow(data_decompose)) 
    for (t in (k+2):nrow(data_decompose)) {
      ## based on the Eq (14/15) in F-SC (2011) 
      x.IS <- data_decompose$dp[1:(t-2)] 
      y.IS <- data_decompose$r[2:(t-1)] 
      intercept <- data_pred$mu_ge1[t-1] #?
      y <- y.IS - intercept 
      data.IS <- data.frame(y, x.IS)
      
      model_constrained <- lm(y ~ x.IS - 1 + offset(1*x.IS), data = data.IS)
      model.IS <- glm(y ~ x.IS - 1, data = data.IS, family = gaussian(link = "identity"), offset = -1*x.IS)
      # mm <- glm.fit(x = x.IS, y = y, offset = -1*x.IS, intercept = F); mm$coefficients
      
      x.new <- data_decompose$dp[t-1] 
      y.pred <- predict.glm(model.IS, newdata = data.frame(x.IS = x.new), type = "response")
      # coef_constrained <- coef(model.IS) - 1 # the true coefficient estimate 
      y.pred_manual <- coef(model.IS) * x.new 
      constrained_sop[t] <- y.pred + intercept # store the prediction
      constrained_sop2[t] <- y.pred_manual + intercept # store the prediction
    }
    data_pred$sop_constrained <- constrained_sop
    data_pred$sop_constrained2 <- constrained_sop2
  
  ## 4. OOS R2 and MSE-F ----
  ### build the function for the bootstrap
  Boot_MSE.F <- function(data, actual, cond, uncond, x, critical.values = TRUE, boot.times = 10000) {
    ## clean and reorganise the data
    ports.dt <- data %>%
      select(month, actual = actual, cond = cond, uncond = uncond, x = x)
    
    ports.dt_narm <- ports.dt %>%
      na.omit() %>%
      mutate(error_A = actual - cond,
             error_N = actual - uncond,
             Cuml_MSE_A = cummean(error_A ^ 2),
             Cuml_MSE_N = cummean(error_N ^ 2),
             Cuml_OOS.rsquared = 1 - Cuml_MSE_A / Cuml_MSE_N,
             Cum_SSE = cumsum(error_N ^2 - error_A ^ 2) ) %>% 
      right_join(data.frame(month = seq(na.omit(ports.dt)$month[1], end.ym - 1/12, 1/freq) ), by = 'month') %>% 
      arrange(desc(-month))
    # ports.dt_narm %>% filter(month > 2009 & month < 2010.5)
    ### for the full (out-of-)sample
    MSE_A <- mean(ports.dt_narm$error_A ^ 2, na.rm = T)
    MSE_N <- mean(ports.dt_narm$error_N ^ 2, na.rm = T)
    OOS_r.squared <- 1 - MSE_A / MSE_N # out-of-sample R square
    MSE_F <- length(na.omit(ports.dt_narm)$month) * (MSE_N - MSE_A) / (MSE_A) 
    MAE_A <- mean(abs(ports.dt_narm$error_A), na.rm = T) %>% round(digits = 6) # Mean absolute error of the conditional model
    print(paste("OOS R Squared: ", round(OOS_r.squared, digits = 4), sep = ""))
    print(paste("MSE-F: ", round(MSE_F, digits = 4), sep = ""))
    
    Cuml_OOS.ts <- ts(ports.dt_narm$Cuml_OOS.rsquared, start = ports.dt_narm$month[1], frequency = freq)
    Cum_SSE.ts <- ts(ports.dt_narm$Cum_SSE, start = ports.dt_narm$month[1], frequency = freq)
    
    ## Bootstrapped MSE-F Stat
    if (critical.values == TRUE) {
      ## get the data series for x and y
      y0 <- ports.dt[is.na(ports.dt$cond), ]$actual
      y1 <- ports.dt[!is.na(ports.dt$cond), ]$actual
      x1 <- as.vector(na.omit(ports.dt$x))
      
      ## full sample estimation for the model
      alpha <- mean(y1)
      u1 <- y1 - alpha
      
      x.lm <- lm(x1 ~ lag(x1))
      mu <- as.numeric(coef(x.lm)[1])
      rho <- as.numeric(coef(x.lm)[2])
      u2 <- as.numeric(x.lm$residuals)
      
      u <- data.frame(index = 1:length(u1), u1, u2) # the dataset storing all the residual info
      
      ### bootstrapping pairs of error terms
      boot.critical <- c()
      for (i in 1:boot.times) { # the bootstrapped times can be modified
        index.boot <- sample(u$index, length(u1), replace = T)
        u.boot <- data.frame()
        for (j in index.boot) {
          u.boot <- rbind.data.frame(u.boot, u[j,])
        } # record the bootstrapped error terms
        
        ### reconstruct the simulated x and y
        y1.new <- alpha + u.boot$u1
        x0.new <- sample(x1, 1) # resample a value as the starting value of x
        x1.new <- c(mu + rho * x0.new + u.boot$u2[1])
        for (j in 2:length(y1.new)) {
          x1.new[j] <- mu + x1.new[j-1] * rho + u.boot$u2[j]
        } # simulate SOP values
        
        ### redo the rolling estimation
        y.boot <- c(y0, y1.new)
        x.boot <- c(rep(NA_real_, (length(y0) - 1)), x0.new, x1.new)
        
        data.dt <- data.frame(month = ports.dt$month, x.boot, y.boot) %>%
          as.tbl() %>%
          mutate(conditional = lag(x.boot), # convert to the SOP prediction
                 unconditional = lag(cummean(y.boot))) %>% # convert to the historical mean prediction
          na.omit()
        
        error_N.boot = data.dt$y.boot - data.dt$unconditional
        error_A.boot = data.dt$y.boot - data.dt$conditional
        MSE_N.boot = mean(error_N.boot ^ 2) 
        MSE_A.boot = mean(error_A.boot ^ 2) 
        OOS_r.squared.boot <- 1 - MSE_A.boot / MSE_N.boot %>% round(digits = 6) # out-of-sample R square
        
        ### MSE-F statistic
        MSE_F.boot <- length(error_N.boot) * (MSE_N.boot - MSE_A.boot) / (MSE_A.boot) %>% round(digits = 6)
        
        boot.critical[i] <- MSE_F.boot
        
        if (i %% (boot.times/10) == 0) {
          timestamp()
          print(paste("SOP", ": ", i %/% (boot.times/100), "%", sep = ""))
        }
      }
      ## store the results
      result <- cbind.data.frame(IS_r.squared = NA_real_,
                                 OOS_r.squared,
                                 MAE_A, # MAE of conditional model
                                 MSE_F,
                                 t(quantile(boot.critical, probs = c(0.90, 0.95, 0.99))),
                                 p.value = mean(boot.critical > MSE_F))
      # d_RMSE = round(d_RMSE, digits = 4),
      # DM_stat = round(DM_test.result$statistic, digits = 4),
      # DM_pval = round(DM_test.result$p.value, digits = 4))
    } else {
      result <- cbind.data.frame(IS_r.squared = NA_real_,
                                 OOS_r.squared,
                                 MAE_A, # MAE of conditional model
                                 MSE_F)
    }
    
    rownames(result) <- "SOP"
    output <- list(result = result, Cuml_OOS.ts = Cuml_OOS.ts, Cum_SSE.ts = Cum_SSE.ts)
    return(output)
  }
  
  ### store the results for the SOP method
  cat("SOP: ")
  sop.result <- Boot_MSE.F(data = data_pred, actual = "r", cond = "sop_simple", uncond = "hist_mean", x = "mu_sop", critical.values = FALSE, boot.times = 3000)
  sop.result$result 
  cat('\n')
  
  cat("SOP_c: ")
  sop_constrained.result <- Boot_MSE.F(data = data_pred, actual = "r", cond = "sop_constrained", uncond = "hist_mean", x = "mu_sop", critical.values = FALSE, boot.times = 3000)
  sop_constrained.result$result
  cat('\n')
  
  # sop_constrained2.result <- Boot_MSE.F(data = data_pred, actual = "r", cond = "sop_constrained2", uncond = "hist_mean", x = "mu_sop", critical.values = FALSE, boot.times = 3000)
  # sop_constrained.result$result
  
  cat('\n')
  par(mfrow = c(2,1))
    par(mar = c(3,5,3,2))
  Cuml_OOS.sop.ts <- ts.intersect(sop.result$Cuml_OOS.ts * 100, sop_constrained.result$Cuml_OOS.ts * 100) 
  plot.ts(Cuml_OOS.sop.ts, 
          plot.type = 'single', lty = c(1,3), col = c(1,'blue'), 
          xlab = NULL, ylab = "as %", main = paste(id.names[c], ": Cumulative OOS R Squared Difference - SOP", sep = "")) 
  abline(h = c(0,1), lty = 2, col = 2)
  abline(v = c(2003,2007), lty = 2, col = 2)
  
    par(mar = c(5,5,3,2))
  Cuml_SSE.sop.ts <- ts.intersect(sop.result$Cum_SSE.ts, sop_constrained.result$Cum_SSE.ts) 
  plot.ts(Cuml_SSE.sop.ts, 
          plot.type = 'single', lty = c(1,3), col = c(1,'blue'), 
          ylab = NULL, cex.lab = 0.5, 
          xlab = "An increase in a line indicates better performance of the conditional model\n *The blue dotted line is for the constrained SOP",
          main = paste(id.names[c], ": Cumulative SSE Difference - SOP", sep = "")) 
  abline(h = 0, lty = 2, col = 2)
  abline(v = c(2003,2008), lty = 2, col = 2)
  par(mfrow = c(1,1))
  
  cat('\n') 
  
  ### store all cumulative OOS R2 difference values
  Cuml_all.ts <- sop.result$Cuml_OOS.ts * 100
  Csse_all.ts <- sop.result$Cum_SSE.ts
  
  ## 5. univariate predictive regressions ----
  table2.uni_predictors <- data.frame()
  cat('\n')
  
  for (predictor in ratio_names) {
    ## construct conditional & unconditional predictions
    data_univariate <- data_decompose %>%
      select(month, r, predictor) %>%
      mutate(hist_mean = lag(cummean(r)),
             x = lag(get(predictor)) %>% log) ## convert to log predictors
      # data_univariate[[predictor]] <- log(data_univariate[[predictor]])
      # data_univariate[["x"]] <- log(data_univariate[["x"]])
    
    ## IS R2
    lm.IS <- lm(r ~ x, data = data_univariate)
    IS_r.squared <- summary(lm.IS)$r.squared # in-sample r squared
    IS_pval <- summary(lm.IS)$coefficients[2,4] # the p-value from F-statistic
    
    ## OOS recursive window predictions
    k <- k # the starting in-sample data
    con_pred = rep(NA_real_, nrow(data_univariate))
    for (t in (k+2):nrow(data_univariate)) {
      x.IS <- data_univariate$x[2:(t-1)]
      y.IS <- data_univariate$r[2:(t-1)]
      reg.IS <- lm(y.IS ~ x.IS)
      
      x.new <- data_univariate$x[t]
      y.pred <- predict(reg.IS, newdata = data.frame(x.IS = x.new))
      con_pred[t] <- y.pred # store the prediction
    }
    data_univariate$con_pred <- con_pred
    data_univariate
    
    ## Stat and Bootstrap
    data = data_univariate
    actual = "r"
    cond = "con_pred"
    uncond = "hist_mean"
    critical.values = FALSE # decide whether the bootstrapped critical value is calculated > Note 3.
    boot.times = 1000 * 10
    
    { 
      cat('\n')
      cat(paste(predictor, ": ")) 
      ## get OOS R2 & MSE-F
      ports.dt <- data %>%
        select(month, actual = actual, cond = cond, uncond = uncond, predictor) %>%
        rename(x = predictor)
      
      ports.dt_narm <- ports.dt %>%
      na.omit() %>%
      mutate(error_A = actual - cond,
             error_N = actual - uncond,
             Cuml_MSE_A = cummean(error_A ^ 2),
             Cuml_MSE_N = cummean(error_N ^ 2),
             Cuml_OOS.rsquared = 1 - Cuml_MSE_A / Cuml_MSE_N,
             Cum_SSE = cumsum(error_N ^2 - error_A ^ 2) ) %>% 
      right_join(data.frame(month = seq(na.omit(ports.dt)$month[1], end.ym - 1/12, 1/freq) ), by = 'month') %>% 
      arrange(desc(-month))
      
      ### for the full (out-of-)sample
      MSE_A <- mean(ports.dt_narm$error_A ^ 2, na.rm = T)
      MSE_N <- mean(ports.dt_narm$error_N ^ 2, na.rm = T)
      OOS_r.squared <- 1 - MSE_A / MSE_N # out-of-sample R square
      MSE_F <- length(na.omit(ports.dt_narm)$month) * (MSE_N - MSE_A) / (MSE_A) 
      MAE_A <- mean(abs(ports.dt_narm$error_A), na.rm = T) %>% round(digits = 6) # Mean absolute error of the conditional model
      print(paste("IS R Squared: ", round(IS_r.squared, digits = 4), sep = ""))
      print(paste("OOS R Squared: ", round(OOS_r.squared, digits = 4), sep = ""))
      print(paste("MSE-F: ", round(MSE_F, digits = 4), sep = ""))
      
      ### combine the cumulative OOS R2 difference of other predictors & cum SSE
      Cuml_pred.ts <- ts(ports.dt_narm$Cuml_OOS.rsquared * 100, start = ports.dt_narm$month[1], frequency = freq)
      Cuml_all.ts <- ts.union(Cuml_all.ts, Cuml_pred.ts)
      Cum_pred.sse <- ts(ports.dt_narm$Cum_SSE, start = ports.dt_narm$month[1], frequency = freq)
      Csse_all.ts <- ts.union(Csse_all.ts, Cum_pred.sse)
      
      ## prepare the bootstrap
      if (critical.values == TRUE) {
        y0 <- ports.dt$actual[1]
        y1 <- ports.dt$actual[-1]
        x1 <- ports.dt$x
        
        ## full sample estimation for the model
        alpha <- mean(y1)
        u1 <- y1 - alpha
        
        x.lm <- lm(x1 ~ lag(x1))
        mu <- as.numeric(coef(x.lm)[1])
        rho <- as.numeric(coef(x.lm)[2])
        u2 <- as.numeric(x.lm$residuals)
        
        u <- data.frame(index = 1:length(u1), u1, u2) # the dataset storing all the residual info
        
        ### bootstrap pairs of error terms
        boot.critical <- c()
        for (i in 1:boot.times) { # the bootstrapped times can be modified
          index.boot <- sample(u$index, length(u1), replace = T)
          u.boot <- data.frame()
          for (j in index.boot) {
            u.boot <- rbind.data.frame(u.boot, u[j,])
          } # record the bootstrapped error terms
          
          ### reconstruct the simulated x and y
          y1.new <- alpha + u.boot$u1
          x0.new <- sample(x1, 1) # resample a value as the starting value of x
          x1.new <- c(mu + rho * x0.new + u.boot$u2[1])
          for (j in 2:length(y1.new)) {
            x1.new[j] <- mu + x1.new[j-1] * rho + u.boot$u2[j]
          } # simulate predictors
          
          ### redo the rolling estimation
          y.boot <- c(y0, y1.new)
          x.boot <- c(x0.new, x1.new)
          
          data.dt <- as.tbl(data.frame(month = ports.dt$month, x.boot, y.boot, x = lag(x.boot)))
          con_pred = rep(NA_real_, nrow(data.dt))
          for (t in (k+2):nrow(data.dt)) {
            x.IS <- data.dt$x[2:(t-1)]
            y.IS <- data.dt$y.boot[2:(t-1)]
            reg.IS <- lm(y.IS ~ x.IS)
            
            x.new <- data.dt$x[t]
            y.pred <- predict(reg.IS, newdata = data.frame(x.IS = x.new))
            con_pred[t] <- y.pred
          }
          data.dt$conditional <- con_pred
          data.dt <- data.dt %>%
            mutate(unconditional = lag(cummean(y.boot))) %>%
            na.omit()
          
          error_N.boot = data.dt$y.boot - data.dt$unconditional
          error_A.boot = data.dt$y.boot - data.dt$conditional
          MSE_N.boot = mean(error_N.boot ^ 2)
          MSE_A.boot = mean(error_A.boot ^ 2)
          OOS_r.squared.boot <- 1 - MSE_A.boot / MSE_N.boot # out-of-sample R square
          
          ### MSE-F statistic
          MSE_F.boot <- length(error_N.boot) * (MSE_N.boot - MSE_A.boot) / (MSE_A.boot)
          
          boot.critical[i] <- MSE_F.boot
          
          if (i %% (boot.times/10) == 0) {
            timestamp()
            print(paste(predictor, ": ", i %/% (boot.times/100), "%", sep = ""))
          }
        }
        ## store the results
        result <- cbind.data.frame(IS_r.squared, 
                                   OOS_r.squared,
                                   MAE_A, # MAE of conditional model
                                   MSE_F,
                                   t(quantile(boot.critical, probs = c(0.90, 0.95, 0.99))),
                                   p.value = mean(boot.critical > MSE_F))
        # d_RMSE = round(d_RMSE, digits = 4),
        # DM_stat = round(DM_test.result$statistic, digits = 4),
        # DM_pval = round(DM_test.result$p.value, digits = 4))
      } else {
        result <- cbind.data.frame(IS_r.squared, 
                                   OOS_r.squared,
                                   MAE_A, # MAE of conditional model
                                   MSE_F)
      }
      rownames(result) <- predictor
      
      result 
      cat('\n')
    }
    
    ## store the results for all predictors
    table2.uni_predictors <- rbind.data.frame(table2.uni_predictors, result)
  }
  table2.uni_predictors
  
  ## 6. statistics summary ----
  table2 <- rbind.data.frame(table2.uni_predictors, "SOP" = sop.result$result, "SOP_c" = sop_constrained.result$result)
  if ("p.value" %in% colnames(table2)) {
    table2$star <- ifelse(table2$p.value <= 0.01, "***", ifelse(table2$p.value <= 0.05, "**", ifelse(table2$p.value <= 0.1, "*", "")))
  }
  # statistical significance from McCracken (2007)
  table2$McCracken <- ifelse(table2$MSE_F >= 3.838, "***", ifelse(table2$MSE_F >= 1.599, "**", ifelse(table2$MSE_F >= 0.685, "*", "")))
  
  table2
  TABLE2[[id.names[c]]] <- table2
  
  table2$rowname <- rownames(table2)
  table2$portname <- id.names[c]
  table2.df <- rbind.data.frame(table2.df, table2)
  
  ## 7. cumulative OOS R2 difference merged dataset ----
  colnames(Cuml_all.ts) <- c("SOP", ratio_names)
  cat('\n') 
  par(mfrow = c(3,3))
  for (method in colnames(Cuml_all.ts)) {
    par(mar = c(2, 2.5, 2, 0.5))
    plot.ts(window(Cuml_all.ts[, method], start = 1990), xlab = NULL, lty = 2, ylab = NULL, main = method, cex.main = 0.8, xaxt = "n", las = 2)
    axis(1, seq(1990, 2020, by = 10), cex.axis = 0.8)
  }
  
  colnames(Csse_all.ts) <- c("SOP", ratio_names)
  par(mfrow = c(3,3))
  for (method in colnames(Csse_all.ts)) {
    par(mar = c(2, 2.5, 2, 0.5))
    plot.ts(Csse_all.ts[, method], xlab = NULL, lty = 2, ylab = NULL, main = method, cex.main = 0.8, xaxt = "n", las = 2)
    axis(1, seq(1990, 2020, by = 10), cex.axis = 0.8)
  }
  par(mfrow = c(1,1), mar = c(5, 4, 4, 2) + 0.1)
  cat('\n')
}

Market

SOP: [1] “OOS R Squared: 0.0286” [1] “MSE-F: 3.9223”

SOP_c: [1] “OOS R Squared: 0.0195” [1] “MSE-F: 2.6272”

DP : [1] “IS R Squared: 0.0285” [1] “OOS R Squared: -0.0421” [1] “MSE-F: -5.3349”

PE : [1] “IS R Squared: 0.0258” [1] “OOS R Squared: 0.0037” [1] “MSE-F: 0.4869”

EY : [1] “IS R Squared: 0.0259” [1] “OOS R Squared: -0.0066” [1] “MSE-F: -0.8646”

DY : [1] “IS R Squared: 0.0286” [1] “OOS R Squared: -0.0585” [1] “MSE-F: -7.2952”

Payout : [1] “IS R Squared: 7e-04” [1] “OOS R Squared: -0.0374” [1] “MSE-F: -4.7629”

BH

SOP: [1] “OOS R Squared: -0.0181” [1] “MSE-F: -2.3841”

SOP_c: [1] “OOS R Squared: 1e-04” [1] “MSE-F: 0.016”

DP : [1] “IS R Squared: 0.0224” [1] “OOS R Squared: -0.0383” [1] “MSE-F: -4.9029”

PE : [1] “IS R Squared: 0.0165” [1] “OOS R Squared: 0.0133” [1] “MSE-F: 1.7919”

EY : [1] “IS R Squared: 0.0188” [1] “OOS R Squared: 0.0163” [1] “MSE-F: 2.1984”

DY : [1] “IS R Squared: 0.0265” [1] “OOS R Squared: -0.0351” [1] “MSE-F: -4.5103”

Payout : [1] “IS R Squared: 4e-04” [1] “OOS R Squared: -0.0063” [1] “MSE-F: -0.8287”

BL

SOP: [1] “OOS R Squared: 0.0356” [1] “MSE-F: 4.9531”

SOP_c: [1] “OOS R Squared: 0.0259” [1] “MSE-F: 3.534”

DP : [1] “IS R Squared: 0.0315” [1] “OOS R Squared: -0.0119” [1] “MSE-F: -1.5624”

PE : [1] “IS R Squared: 0.0141” [1] “OOS R Squared: -0.0039” [1] “MSE-F: -0.5186”

EY : [1] “IS R Squared: 0.0144” [1] “OOS R Squared: -0.0091” [1] “MSE-F: -1.201”

DY : [1] “IS R Squared: 0.0324” [1] “OOS R Squared: -0.0216” [1] “MSE-F: -2.809”

Payout : [1] “IS R Squared: 6e-04” [1] “OOS R Squared: -0.0184” [1] “MSE-F: -2.4001”

BM

SOP: [1] “OOS R Squared: 0.0018” [1] “MSE-F: 0.244”

SOP_c: [1] “OOS R Squared: 0.0043” [1] “MSE-F: 0.5704”

DP : [1] “IS R Squared: 0.0114” [1] “OOS R Squared: -0.0433” [1] “MSE-F: -5.5247”

PE : [1] “IS R Squared: 0.0214” [1] “OOS R Squared: -0.0159” [1] “MSE-F: -2.082”

EY : [1] “IS R Squared: 0.0252” [1] “OOS R Squared: -0.0219” [1] “MSE-F: -2.8535”

DY : [1] “IS R Squared: 0.0149” [1] “OOS R Squared: -0.0529” [1] “MSE-F: -6.6854”

Payout : [1] “IS R Squared: 0.0033” [1] “OOS R Squared: -0.019” [1] “MSE-F: -2.4759”

SH

SOP: [1] “OOS R Squared: -0.044” [1] “MSE-F: -5.6439”

SOP_c: [1] “OOS R Squared: -0.02” [1] “MSE-F: -2.6139”

DP : [1] “IS R Squared: 0.0096” [1] “OOS R Squared: -0.0423” [1] “MSE-F: -5.3982”

PE : [1] “IS R Squared: 0.0088” [1] “OOS R Squared: -0.0245” [1] “MSE-F: -3.1797”

EY : [1] “IS R Squared: 0.0106” [1] “OOS R Squared: -0.0191” [1] “MSE-F: -2.4901”

DY : [1] “IS R Squared: 0.0114” [1] “OOS R Squared: -0.0483” [1] “MSE-F: -6.1317”

Payout : [1] “IS R Squared: 0” [1] “OOS R Squared: -0.0313” [1] “MSE-F: -4.0388”

SL

SOP: [1] “OOS R Squared: -0.0449” [1] “MSE-F: -5.7528”

SOP_c: [1] “OOS R Squared: -0.0408” [1] “MSE-F: -5.2157”

DP : [1] “IS R Squared: 0.0364” [1] “OOS R Squared: 0.013” [1] “MSE-F: 1.7514”

PE : [1] “IS R Squared: 0.0388” [1] “OOS R Squared: 0.0168” [1] “MSE-F: 2.2735”

EY : [1] “IS R Squared: 0.0357” [1] “OOS R Squared: 0.0124” [1] “MSE-F: 1.6714”

DY : [1] “IS R Squared: 0.0341” [1] “OOS R Squared: 0.0088” [1] “MSE-F: 1.1826”

Payout : [1] “IS R Squared: 0.0024” [1] “OOS R Squared: -0.0105” [1] “MSE-F: -1.3849”

SM

SOP: [1] “OOS R Squared: -0.0311” [1] “MSE-F: -4.0448”

SOP_c: [1] “OOS R Squared: -0.0201” [1] “MSE-F: -2.622”

DP : [1] “IS R Squared: 0.0118” [1] “OOS R Squared: -0.0392” [1] “MSE-F: -5.0128”

PE : [1] “IS R Squared: 0.0178” [1] “OOS R Squared: -0.0052” [1] “MSE-F: -0.691”

EY : [1] “IS R Squared: 0.0182” [1] “OOS R Squared: -0.0074” [1] “MSE-F: -0.9803”

DY : [1] “IS R Squared: 0.012” [1] “OOS R Squared: -0.0502” [1] “MSE-F: -6.3584”

Payout : [1] “IS R Squared: 0” [1] “OOS R Squared: -0.0204” [1] “MSE-F: -2.657”

Table 2 - Forecasts of portfolio returns

This table demonstrates the in-sample and out-of-sample R-squares for the market and six size and book-to-market equity ratio sorted portfolios from predictive regressions and the Sum-of-the-Parts method. IS R-squares are estimated using the whole sample period and the OOS R-squares are calculated compare the forecast error of the model against the historical mean model. The full sample period starts from Feb 1966 to December 2019 and the IS period is set to be 20 years with forecsats beginning in Feb 1986. The MSE-F statistics are calculated to test the hypothesis \(H_0: \text{out-of-sample R-squares} = 0\) vs \(H_1: \text{out-of-sample R-squares} \neq 0\).

Predictors here are all in log terms.

gt(table2.df, rowname_col = "rowname", groupname_col = "portname") %>%
  tab_header(title = "Table 2 - Forecasts of portfolio returns",
             subtitle = paste("OOS ", freq_name(freq = freq), " data starts from ", first(data_decompose$month) + k/freq, " and ends in ", last(data_decompose$month), ".", sep = "")) %>%
  fmt_number(columns = 1:4, decimals = 6, suffixing = TRUE)
Table 2 - Forecasts of portfolio returns
OOS quarterly data starts from Mar 1986 and ends in Dec 2019.
IS_r.squared OOS_r.squared MAE_A MSE_F McCracken
Market
DP 0.028490 −0.042118 0.062166 −5.334913
PE 0.025835 0.003675 0.060259 0.486856
EY 0.025852 −0.006593 0.060855 −0.864610
DY 0.028605 −0.058500 0.062901 −7.295240
Payout 0.000731 −0.037433 0.059298 −4.762883
SOP NA 0.028646 0.057615 3.922281 ***
SOP_c NA 0.019514 0.057035 2.627157 **
BH
DP 0.022398 −0.038275 0.064103 −4.902889
PE 0.016491 0.013294 0.059548 1.791903 **
EY 0.018779 0.016260 0.059614 2.198365 **
DY 0.026481 −0.035102 0.064150 −4.510304
Payout 0.000443 −0.006270 0.059530 −0.828665
SOP NA −0.018114 0.059893 −2.384101
SOP_c NA 0.000120 0.059056 0.015952
BL
DP 0.031507 −0.011887 0.066818 −1.562427
PE 0.014067 −0.003915 0.064973 −0.518615
EY 0.014378 −0.009112 0.065159 −1.200972
DY 0.032397 −0.021576 0.066966 −2.809004
Payout 0.000559 −0.018378 0.064658 −2.400101
SOP NA 0.035646 0.062882 4.953134 ***
SOP_c NA 0.025884 0.062564 3.534031 **
BM
DP 0.011432 −0.043340 0.060502 −5.524733
PE 0.021390 −0.015903 0.059851 −2.082031
EY 0.025247 −0.021925 0.060433 −2.853495
DY 0.014876 −0.052927 0.061283 −6.685414
Payout 0.003311 −0.018969 0.058235 −2.475918
SOP NA 0.001817 0.056794 0.243981
SOP_c NA 0.004271 0.055939 0.570419
SH
DP 0.009618 −0.042305 0.084098 −5.398179
PE 0.008807 −0.024493 0.084437 −3.179703
EY 0.010580 −0.019080 0.084432 −2.490133
DY 0.011447 −0.048331 0.084455 −6.131691
Payout 0.000001 −0.031318 0.084133 −4.038838
SOP NA −0.043970 0.084479 −5.643860
SOP_c NA −0.020047 0.083880 −2.613894
SL
DP 0.036424 0.012997 0.113561 1.751423 **
PE 0.038803 0.016806 0.112345 2.273459 **
EY 0.035693 0.012411 0.112565 1.671404 **
DY 0.034129 0.008813 0.113524 1.182586 *
Payout 0.002419 −0.010522 0.114480 −1.384887
SOP NA −0.044857 0.115354 −5.752802
SOP_c NA −0.040817 0.115593 −5.215748
SM
DP 0.011836 −0.039167 0.084040 −5.012847
PE 0.017771 −0.005223 0.082026 −0.691046
EY 0.018206 −0.007426 0.082511 −0.980313
DY 0.012049 −0.050208 0.084126 −6.358374
Payout 0.000003 −0.020385 0.081478 −2.657006
SOP NA −0.031125 0.081848 −4.044810
SOP_c NA −0.020111 0.081605 −2.622009

Figure 4 - Quarterly return predictions

Here I only present the quarterly predictions of the historical mean model, the SOP method and the predictive regressions based on the dividend-price ratio and the earnings-price ratio.

Return Predictions

# TABLE-3 optimal weights and CEGs ----
all_predictions <- list() ## store all the prediction values
# kpss.ts <- kpss.diff <- as.data.frame(matrix(nrow = 8, ncol = 7), col.names = c(market.names, data.names))
c <- 0

for (id in c(market.names, data.names)) {
  c <- c + 1
  # print(id); print(id.names[c])
  cat('\n')
  cat('#### ', id.names[c], '   \n')
  
  ## construct SOP predictions
  data_nyse <- read.csv(id) %>%
    as.tbl() %>%
    mutate(month = as.yearmon(month)) %>%
    filter(month >= start.ym) %>% # start from "Jan 1966"
    select(month, r = vwret, P = Index, E = E12, D = D12) %>%
    mutate(DP = D / P,
           PE = P / E,
           EP = E / P,
           EY = E / lag(P), # earnings yield
           DY = D / lag(P), # dividend yield
           Payout = D / E) # payout ratios
  
  ## 2. return decomposition ----
  k = freq * 20 # set a 20-year rolling window 
  
  data_decompose <- data_nyse %>% # also try PD ratio replacing PE.
    mutate(r = r, # cts returns (has already being the log return in row 95)
           gm = log(PE) - lag(log(PE)), # multiple expansion rate
           ge = log(E) - lag(log(E)), # earnings growth rate
           # mu_ge0 = (log(E) - lag(log(E), k)) / k,
           dp = log(1 + DP/freq)) %>% # see note 1.
    # only 1/12 of the annualised dividend is included. > see note 1.
    na.omit() %>% 
    left_join(select(data_nyse, month, Ek = E) %>% mutate(month = month + k/freq), by = 'month' ) %>% 
    mutate(mu_ge0 = (log(E) - log(Ek)) / k ) 
  
  ## 3. SOP predictions ---- 
  data_pred <- data_decompose %>%
    select(month, r, gm, ge, dp, mu_ge0) %>%
    mutate(mu_gm = 0,
           mu_ge1 = rollmeanr(ge, k, fill = NA_real_), # rolling mean 
           mu_ge2 = c(rep(NA_real_, (k-1)), cummean(ge)[k:length(ge)]), # recursive mean
           mu_ge3 = cummean(ge), # recursive mean from the beginning 
           a_DK1 = rollmeanr(r - dp, k, fill = NA_real_), # methods Eq (14/15) by DK 
           a_DK2 = cummean(r - dp), # methods Eq (14/15) by DK 
           mu_dp = dp,
           mu_sop = mu_gm + mu_ge0 + mu_dp) %>% # the predictor > see note 2.
    mutate(sop_simple = lag(mu_sop), # conditional predictions
           hist_mean = lag(cummean(r)) ) # historical mean predictions
  
  all_predictions.dt <- data_pred %>% 
    right_join(data.frame(month = seq(data_pred$month[1], end.ym - 1/12, 1/freq) ), by = 'month') %>% 
    arrange(desc(-month)) %>% 
    select(month, r, hist_mean, sop_simple) # store the prediction results
  
  ## construct univariate predictions
  for (predictor in ratio_names) {
    ## construct conditional & unconditional predictions
    data_univariate <- data_decompose %>%
      select(month, r, predictor) %>%
      mutate(x = lag(get(predictor)) %>% log) %>%
      select(-predictor)
    
    ## IS R2
    lm.IS <- lm(r ~ x, data = data_univariate)
    IS_r.squared <- summary(lm.IS)$r.squared # in-sample r squared
    
    ## OOS recursive window predictions
    k <- k # the starting in-sample data
    con_pred = rep(NA_real_, nrow(data_univariate))
    for (t in (k+2):nrow(data_univariate)) {
      x.IS <- data_univariate$x[2:(t-1)]
      y.IS <- data_univariate$r[2:(t-1)]
      reg.IS <- lm(y.IS ~ x.IS)
      
      x.new <- data_univariate$x[t]
      y.pred <- predict(reg.IS, newdata = data.frame(x.IS = x.new))
      con_pred[t] <- y.pred # store the prediction
    }
    data_univariate[[predictor]] <- con_pred
    data_univariate # get the univariate predictions
    
    all_predictions.dt <- all_predictions.dt %>% # combine into all other estimations
      left_join(select(data_univariate, month, predictor), by = "month")
  }
  
  ## store all the predictions generated
  all_predictions[[id.names[c]]] <- na.omit(all_predictions.dt)
  names(all_predictions.dt)
  
 ## show the performance of variance predictions
  cat('\n')
  
  p <- c("hist_mean", "sop_simple", "DP", "PE")
  # all_predictions.ts <- na.omit(all_predictions.dt)
  all_predictions.ts <- ts(all_predictions.dt[, -(1:2)], start = all_predictions.dt$month[1], frequency = freq) %>% 
    window(start = as.numeric(start.ym + k/freq) )
  plot.ts(all_predictions.ts[, p], plot.type = "single",  col = 1:length(p), xlab = NULL, ylab = paste(str_to_title(freq_name(freq = freq)), " Returns", sep = ""))
  title(main = paste(id.names[c], ": ", freq_name(freq = freq), " return predictions", sep = ""))
  legend(xpd = T, inset = -0.35, "bottom", legend = p, col = 1:length(p), lty = 1, horiz = T, cex = 0.8, bty = "n", lwd = 2)
  cat('\n')
}

Market

BH

BL

BM

SH

SL

SM

Figure 5 - Trading Performance (with no trading restrictions)

  • Markowitz optimal weight on the risky portfolio using the SOP method and the historical mean model.
  • Markowitz optimal weight on the risky portfolio using all models.
  • Out-of-sample performance of the trading strategies - the SOP method, the historical mean model and the buy-and-hold strategy - with no trading restrictions.

Trading Performance

## 8. calculate the CEGs ----
TABLE3 <- list()
TABLE4 <- list()

table3.df <- data.frame()
table4.df <- data.frame()

##
risk.coef <- 3 # the risk-aversion coefficient
for (id in names(all_predictions)) {
  all_predictions.dt <- all_predictions[[id]] %>% # read corresponding data
    right_join(data.frame(month = seq(head(.$month, 1), tail(.$month, 1), 1/freq) ), by = 'month') %>% 
    arrange(desc(-month))
  cat('\n')
  cat('#### ', id, '   \n') 
  
  var.s <- rep(NA_real_, length(all_predictions.dt$r))
  for (t in 1:(length(var.s) - 1)) {
    var.s[t+1] <- var(all_predictions.dt$r[1:t], na.rm = T) # calculate variance estimation
  }
  all_predictions.dt$var.s <- var.s # historical sample variance
  all_predictions.dt <- all_predictions.dt %>%
    left_join(RF, by = "month") %>%
    rename(Rfree = t30ret) %>% # include the risk-free rate
    mutate(Rfree = lag(Rfree)) # choose the lag of that rate
  
  ## obtain the optimal weights on risky portfolios
  weights_stock <- all_predictions.dt
  for (mu in c("r", "hist_mean", "sop_simple", ratio_names)) {
    weights_stock[[mu]] <- (exp(weights_stock[[mu]]) - 1 - weights_stock[["Rfree"]]) / (risk.coef * weights_stock[["var.s"]])
  } ## calculate the optimal weights
  
  cat('\n')
  ## plot the weights
  plot.ts(ts(weights_stock$sop_simple, start = weights_stock$month[1], frequency = freq) %>% window(start = 1986), xlab = NULL, ylab = NULL, main = substitute(paste("The optimal weights on the ", name, " portfolio with ", gamma, " = ", g, sep = ""), list(name = id, g = risk.coef)))
  lines(ts(weights_stock$hist_mean, start = weights_stock$month[1], frequency = freq), col = 2, lty = 3)
  abline(h = c(0, 1, 1.5), col = "greY50", lty = 3)
  legend("bottomright", legend = c("sop_simple", "hist_mean"), col = 1:2, lty = c(1,3), cex = 0.8)
  
  par(mar = c(5, 4, 3, 2))
  plot.ts(ts(weights_stock[c("hist_mean", "sop_simple", ratio_names)],
             start = weights_stock$month[1], frequency = freq) %>% window(start = 1986),
          plot.type = "single", col = 1:length(c("hist_mean", "sop_simple", ratio_names)),
          ylim = c(-1, 3), xlab = NULL, ylab = NULL)
  abline(h = c(0, 1, 1.5), lty = 2, col = "grey50")
  title(main = substitute(paste("The optimal weight on the ", name, " portfolio with ", gamma, " = ", g, sep = ""),
                          list(name = id, g = risk.coef)))
  legend(xpd = T, "bottom", inset = -0.28, lty = 1, lwd = 2, nc = 4, cex = 0.8, bty = "n",
         legend = c("hist_mean", "sop_simpel", ratio_names),
         col = 1:length(c("hist_mean", "sop_simpel", ratio_names)))
  
  ## 8.1 calculate OOS trading performance ----
  ## using upper and lower boundaries
  rp <- all_predictions.dt %>%
    select(month, r, Rfree) %>%
    mutate(r = exp(r) - 1) # change from the log return back to the simple return
  limit.opt = F # the option to set limit or not
  limit.max <- 1.0 # the upper bound of the portfolio weight
  limit.min <- 0 # the lower bound of the portfolio weight
  for (mu in c("hist_mean", "sop_simple", ratio_names)) {
    w <- weights_stock[[mu]]
    if (limit.opt == TRUE) { # whether to pose a upper-lower bound
      w[w > limit.max & !is.na(w)] <- limit.max
      w[w < limit.min & !is.na(w)] <- limit.min
    }
    rp[[mu]] <- w * rp$r + (1 - w) * rp$Rfree # calculate the portfolio returns
  } # 'rp' has the risky+rff trading portfolio returns
  table3.mean <- apply(na.omit(rp[-1]), 2, mean)
  table3.var <- apply(na.omit(rp[-1]), 2, var)
  table3.cer <- table3.mean - 1/2 * risk.coef * table3.var # certainty equivalent returns
  table3.dt <- cbind.data.frame(mean = table3.mean, 
                                variance = table3.var, 
                                CERs = table3.cer,
                                CEGs_annualised = (table3.cer - table3.cer["hist_mean"]) * freq)
  table3.dt
  TABLE3[[id]] <- table3.dt
  
  table3.dt <- round(table3.dt, digits = 6)
  table3.dt$rowname <- rownames(table3.dt)
  table3.dt$portname <- id
  table3.df <- rbind.data.frame(table3.df, table3.dt) # for forming tables
  
  pfm <- na.omit(rp) %>% # simulate its performance
    mutate(buyhold = cumprod(1 + r),
           index.hm = cumprod(1 + hist_mean),
           index.sop = cumprod(1 + sop_simple)) %>% 
    right_join(data.frame(month = seq(rp$month[1], tail(rp$month, 1), 1/freq) ), by = 'month') %>% 
    arrange(desc(-month)) %>% 
    select(month, buyhold, index.hm, index.sop)
  pfm.ts <- ts(pfm[-1], start = pfm$month[1], frequency = freq)
  plot.ts(pfm.ts, plot.type = "single", col = 1:3, log = "y", lty = c(1, 2, 2), xlab = NULL, ylab = "Performance")
  apply(pfm.ts, 2, max)
  # text(x = end(time(pfm.ts)), y = apply(pfm.ts, 2, max), labels = round(apply(pfm.ts, 2, max), digits = 2), cex = 0.5, col = 1:3, pos = 4)
  legend("topleft", legend = colnames(pfm.ts), lty = c(1,2,3), col = 1:3, cex = 0.8)
  if (limit.opt == TRUE) {
    title(main = substitute(paste(id, " (", limit.min, " <= ", omega, " <= ", limit.max, ")", sep = ""),
                            list(id = id, limit.min = limit.min, limit.max = limit.max)))
  } else {
    title(main = paste(id, " (no restrictions)", sep = ""))
  }
  
  ## TABLE-4 Sharpe ratio and Sharpe ratio gains----
  excess_returns <- rp # store the excess returns
  for (mu in c("r", "hist_mean", "sop_simple", ratio_names)) {
    excess_returns[[mu]] <- excess_returns[[mu]] - excess_returns[["Rfree"]]
  } # constructing the excess returns of the portfolio(s)
  names(excess_returns)
  table4.mean <- apply(na.omit(excess_returns)[-1], 2, mean)
  table4.sd <- apply(na.omit(excess_returns)[-1], 2, sd) # variance estimation is the same as in the CEGs. 
  table4.sr <- table4.mean / table4.sd # get the Sharpe ratio
  table4.dt <- cbind.data.frame(mean = table4.mean,
                                sd = table4.sd,
                                SR = table4.sr,
                                SR_annualised = table4.sr * sqrt(freq),
                                SRG_annualised = (table4.sr - table4.sr["hist_mean"]) * sqrt(freq))
  table4.dt
  TABLE4[[id]] <- table4.dt
  
  table4.dt <- round(table4.dt, digits = 6)
  table4.dt$rowname <- rownames(table4.dt)
  table4.dt$portname <- id
  table4.df <- rbind.data.frame(table4.df, table4.dt) 
  
  cat('\n')
}

Market

BH

BL

BM

SH

SL

SM

Table 3 - Certaint equivalent gains

Trading Strategies: certaint equivalent gains

This table shows the out-of-sample portfolio choice results at quarterly frequencies from predictive regressions and the SOP method. The trading strategy for each portfolio is designed by optimally allocating funds between the risk-free asset and the corresponding risky portfolio. The certainty equivalent return is \(\overline{rp} - \frac{1}{2} \gamma \hat{\sigma}_{rp}^{2}\) with a risk-aversion coefficient \(\gamma = 3\). The annualised certainty equivalent gain (in percentage) is the quarterly certainty equivalent gain multiplied by the corresponding frequency (e.g. 12 for monthly data).

dt <- table3.df %>%
  filter(rowname %in% c(ratio_names, "sop_simple")) %>%
  select(CEGs_annualised, rowname, portname)

as.data.frame(matrix(dt$CEGs_annualised, byrow = F, nrow = 6, ncol = 7)) %>%
  `colnames<-`(unique(dt$portname)) %>%
  mutate(Variable = unique(dt$rowname)) %>%
  # round(digits = 4) %>%
  as.tbl() %>%
  select(Variable, unique(dt$portname)) %>%
  gt(rowname_col = "Variable") %>%
  tab_header(title = "Table 3 - Trading Strategies: certainty equivalent gains",
             subtitle = paste(str_to_title(freq_name(freq = freq)), " data starts from ", first(data_decompose$month) + k/freq, " and ends in ", last(data_decompose$month), ".", sep = "")) %>%
  fmt_percent(columns = 2:8, decimals = 2)
Table 3 - Trading Strategies: certainty equivalent gains
Quarterly data starts from Mar 1986 and ends in Dec 2019.
Market BH BL BM SH SL SM
sop_simple 2.58% 72.51% 2.49% 0.43% −1.25% −0.74% −0.43%
DP −4.51% −37.83% −2.62% −4.49% −3.41% −1.17% −3.00%
PE −0.23% 91.35% −0.74% −1.14% −1.28% 1.62% −0.33%
EY −0.81% 93.27% −1.02% −1.12% −0.80% 1.26% −0.32%
DY −5.84% −44.85% −3.33% −5.05% −3.75% −1.19% −3.79%
Payout −1.62% 37.96% −0.97% −1.06% −2.84% −0.95% −1.44%

Table 4 - Sharpe ratio Gains

Trading Strategies: Sharpe ratio Gains

This table presents the Sharpe ratio of the out-of-sample performance of trading strategies, allocating funds between risk-free and risky assets for each portfolio. The annualised Sharpe ratio is generated by multipling the quarterly Sharpe ratio by square root of the corresponding frequency (e.g. \(\sqrt{12}\) for monthly data).

dt <- table4.df %>%
  filter(rowname %in% c(ratio_names, "sop_simple")) %>%
  select(SRG_annualised, rowname, portname)

as.data.frame(matrix(dt$SRG_annualised, byrow = F, nrow = 6, ncol = 7)) %>%
  `colnames<-`(unique(dt$portname)) %>%
  mutate(Variable = unique(dt$rowname)) %>%
  # round(digits = 4) %>%
  as.tbl() %>%
  select(Variable, unique(dt$portname)) %>%
  gt(rowname_col = "Variable") %>%
  tab_header(title = "Table 4 - Trading Strategies: Sharpe ratio gains", 
             subtitle = paste(str_to_title(freq_name(freq = freq)), " data starts from ", first(data_decompose$month) + k/freq, " and ends in ", last(data_decompose$month), ".", sep = "")) %>%
  fmt_number(columns = 2:8, decimals = 4) 
Table 4 - Trading Strategies: Sharpe ratio gains
Quarterly data starts from Mar 1986 and ends in Dec 2019.
Market BH BL BM SH SL SM
sop_simple 0.1442 0.0859 0.2092 0.0276 0.0318 0.1530 0.0360
DP −0.3305 −0.1012 −0.1965 −0.3647 −0.1376 0.1413 −0.1963
PE 0.0359 0.1166 0.0093 −0.0747 −0.0546 0.2972 −0.0173
EY −0.0097 0.1310 −0.0226 −0.0730 −0.0343 0.2658 −0.0180
DY −0.4254 −0.1026 −0.2850 −0.4410 −0.1417 0.1286 −0.2293
Payout −0.0724 0.0282 −0.0865 −0.0311 −0.0905 −0.1985 −0.0796

Figure 6 - Sensitivity of Certainty Equivalent Gains relative to Risk-Aversion level

This figure presents the out-of-sample portfolio choice results at monthly frequency from bivariate predictive regressions and the SOP method with different levels of risk-aversion. To show that our previous results hold with respect to investors with different levels of risk aversion, we evaluate the changes in certainty equivalent gains with respect to the changes in the level of risk-aversion. The results of the trading strategy reported here are without trading restrictions (as in Table 5), allocating funds between the risk-free asset and the risky equity portfolio. The portfolio choice results are evaluated in the certainty equivalent return with relative risk-aversion coefficient \(\gamma\), with ${\(0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5\)}$. Risky equity portfolios include the market portfolio and six size and book-to-market equity sorted portfolios, BH, BM, BL, SH, SM and SL. The annualised certainty equivalent gain is the monthly certainty equivalent gain multiplied by twelve. The sample period is from February 1966 to December 2019 and the out-of-sample period starts in March 1986.

## 8. calculate the CEGs ----
TABLE3_2 <- data.frame(matrix(nrow = 0, ncol = 0)) # for the CEGs with different risk-averse coefficients.

### 8.1(new) CEGs with different risk-averse coefficients ----
### This part will be recorded in TABLE3_2.
for (id in names(all_predictions)) { # the sensitivity analysis of CEGs wrt. risk-averse coefficient
  print(id); timestamp()
  all_predictions.dt <- all_predictions[[id]] # read corresponding data
  
  var.s <- rep(NA_real_, length(all_predictions.dt$r))
  for (t in 1:(length(var.s) - 1)) {
    var.s[t+1] <- var(all_predictions.dt$r[1:t])
  }
  all_predictions.dt$var.s <- var.s # historical sample variance
  all_predictions.dt <- all_predictions.dt %>%
    left_join(RF, by = "month") %>%
    rename(Rfree = t30ret) %>% # include the risk-free rate
    mutate(Rfree = lag(Rfree)) # choose the lag of that rate
  
  ## record the annualised CEG (certainty equivalent gains)
  table3.ceg_a <- c()
  
  for (risk.coef in seq(from = 0.5, to = 5, by = 0.5)) { # for different level of risk averse
    
    ## obtain the optimal weights on risky portfolios
    weights_stock <- all_predictions.dt
    for (mu in c("r", "hist_mean", "sop_simple")) {
      weights_stock[[mu]] <- (exp(weights_stock[[mu]]) - 1 - weights_stock[["Rfree"]]) / (risk.coef * weights_stock[["var.s"]])
    }
    
    ## calculate OOS trading performance
    ## using upper and lower boundaries
    rp <- all_predictions.dt %>%
      select(month, r, Rfree) %>%
      mutate(r = exp(r) - 1) # convert back to simple returns
    limit.opt = F # the option to set limit or not
    limit.max <- 1.0 # the upper bound of the portfolio weight
    limit.min <- 0 # the lower bound of the portfolio weight
    for (mu in c("hist_mean", "sop_simple")) {
      w <- weights_stock[[mu]]
      if (limit.opt == TRUE) { # whether to pose a upper-lower bound
        w[w > limit.max & !is.na(w)] <- limit.max
        w[w < limit.min & !is.na(w)] <- limit.min
      }
      rp[[mu]] <- w * rp$r + (1 - w) * rp$Rfree # calculate the portfolio returns
    } # 'rp' has the risky-rf trading portfolio returns
    table3.mean <- apply(na.omit(rp[-1]), 2, mean)
    table3.var <- apply(na.omit(rp[-1]), 2, var)
    table3.cer <- (table3.mean - 1/2 * risk.coef * table3.var) # certainty equivalent returns
    table3.ceg <- (table3.cer - table3.cer["hist_mean"]) * freq # annualised certainty equivalent gains
    table3.ceg_a <- c(table3.ceg_a, table3.ceg[['sop_simple']]) # store the annualised CEGs values
  }
  # store the annualised CEGs for the same portfolio with different risk-averse coefficients.
  TABLE3_2 <- rbind.data.frame(TABLE3_2, table3.ceg_a)
}
## [1] "Market"
## ##------ Thu Mar 28 09:00:48 2024 ------##
## [1] "BH"
## ##------ Thu Mar 28 09:00:48 2024 ------##
## [1] "BL"
## ##------ Thu Mar 28 09:00:48 2024 ------##
## [1] "BM"
## ##------ Thu Mar 28 09:00:48 2024 ------##
## [1] "SH"
## ##------ Thu Mar 28 09:00:48 2024 ------##
## [1] "SL"
## ##------ Thu Mar 28 09:00:48 2024 ------##
## [1] "SM"
## ##------ Thu Mar 28 09:00:48 2024 ------##
colnames(TABLE3_2) <- seq(from = 0.5, to = 5, by = 0.5)
rownames(TABLE3_2) <- names(all_predictions)
write.csv(TABLE3_2, file = "table3_2.csv")

### draw line graphs for these and show the sensitivity.
dt_3.2 <- (TABLE3_2 * 100) %>% # convert to in percentage
  as.matrix() %>%
  c() %>%
  cbind.data.frame(rep(seq(0.5, 5, by = 0.5), each = 7)) %>%
  cbind.data.frame(rep(names(all_predictions), times = 10))
colnames(dt_3.2) <- c("CEGs", "Risk_coef", "Portfolios")
# dt_3.2 # this is the dataset

# jpeg(filename = "table3_2.jpeg", width = 700, height = 500)
ggplot(dt_3.2, aes(x = Risk_coef, y = CEGs, group = Portfolios)) +
  geom_point(shape = 4) +
  geom_line(aes(linetype = Portfolios)) + 
  scale_y_continuous(breaks = seq(-40, 20, by = 10), limits = c(-40, 20)) +
  # scale_y_continuous(breaks = seq(round(min(dt_3.2$CEGs)) - 5, round(max(dt_3.2$CEGs)) + 5, by = 10), limits = c(-40, 20)) +
  ylab(label = "Annualised Certainty Equivalent Gains (in %)") + 
  xlab(label = "Risk-aversion Coefficient") + 
  theme_linedraw()

# dev.off()

Table 5 - MSPE-adjusted Statistic

MSPE-adjusted Statistic

This table presents the MSEP-adjusted Statistics, evaluating the statistical significance of the out-of-sample R-squared statistics of each model in the corresponding portfolio.

See Rapach et al., (2010) and Clark and West (2007) for the detailed procedure.

table5.df <- data.frame()
for (port in names(TABLE5)) {
  pt <- TABLE5[[port]]
  pt$rowname <- rownames(pt)
  pt$portname <- port
  colnames(pt)[4] <- "star"
  table5.df <- rbind.data.frame(table5.df, pt)
}

table5.output <- gt(table5.df, rowname_col = "rowname", groupname_col = "portname") %>%
  fmt_percent(columns = vars(OOS_r.squared, mspe_pvalue), decimals = 2) %>%
  fmt_number(columns = vars(mspe_t), decimals = 4) %>%
  tab_header(title = "Table 5 - MSPE-adjusted Statistic",
             subtitle = paste(str_to_title(freq_name(freq = freq)), " data starts from ", first(data_decompose$month) + k/freq, " and ends in ", last(data_decompose$month), ".", sep = ""))

table5.output
Table 5 - MSPE-adjusted Statistic
Quarterly data starts from Mar 1986 and ends in Dec 2019.
OOS_r.squared mspe_t mspe_pvalue star
Market
DP −4.21% 0.5911 27.77%
PE 0.37% 1.0398 15.02%
EY −0.66% 0.8492 19.87%
DY −5.85% 0.3003 38.22%
Payout −3.74% −0.9195 82.02%
SOP 2.86% 2.2386 1.34% **
BH
DP −3.83% 1.0493 14.80%
PE 1.33% 1.6040 5.55% *
EY 1.63% 1.7737 3.92% **
DY −3.51% 1.0933 13.81%
Payout −0.63% −1.1507 87.40%
SOP −1.81% −0.2267 58.95%
BL
DP −1.19% 0.7942 21.43%
PE −0.39% 0.6001 27.47%
EY −0.91% 0.4710 31.92%
DY −2.16% 0.5885 27.86%
Payout −1.84% −0.9961 83.95%
SOP 3.56% 2.2220 1.40% **
BM
DP −4.33% −0.0801 53.18%
PE −1.59% 0.7952 21.40%
EY −2.19% 0.7915 21.50%
DY −5.29% −0.0746 52.97%
Payout −1.90% −0.0428 51.70%
SOP 0.18% 0.6945 24.43%
SH
DP −4.23% 0.9396 17.46%
PE −2.45% −0.5058 69.31%
EY −1.91% −0.4215 66.30%
DY −4.83% 0.7821 21.78%
Payout −3.13% −1.4778 92.91%
SOP −4.40% −0.4736 68.17%
SL
DP 1.30% 1.6045 5.55% *
PE 1.68% 1.5135 6.63% *
EY 1.24% 1.3020 9.76% *
DY 0.88% 1.4577 7.36% *
Payout −1.05% −1.0437 85.07%
SOP −4.49% −0.1798 57.12%
SM
DP −3.92% 0.8861 18.86%
PE −0.52% 0.7559 22.55%
EY −0.74% 0.5915 27.76%
DY −5.02% 0.7225 23.56%
Payout −2.04% −1.8312 96.53%
SOP −3.11% −0.2481 59.78%